## Covid-19 Impact on Airlines Industry ##
Businesses across industries all over the world are facing crisis due to the global COVID-19 pandemic and airlines industries also facing the same fate due to the pandemic.There is some improvement in last few months but how much is the impact and what time it will take to come into the previous situation, to analyze this i will use the last 10 years passenger, revenue, load and other datas, and will see there trend and behavior, for this i will use some time series plot to analyze the trend, seasonality and level of the values and in the end i will predict for the next 2 years data by using different methods like ARIMA, Additive Seasonality, Exponential Seasonality etc.
#Load the require library.
library(readxl)
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.3 ✓ fma 2.4
## ✓ forecast 8.14 ✓ expsmooth 2.3
##
library(forecast)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(dygraphs)
library(xts)
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(latticeExtra)
## Loading required package: lattice
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
library(TSstudio)
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
#Us city passenger data for both international and domestic flights.
# Load the dataset.
Passenger_data <- read_excel("Passengers_3_19_2021.xlsx",skip = 1)
# Print First 6 rows of the dataset.
head(Passenger_data)
#TO check the Class pf the attributes present in the dataset.
str(Passenger_data)
## tibble [242 × 5] (S3: tbl_df/tbl/data.frame)
## $ Year : chr [1:242] "2002" "2002" "2002" "2002" ...
## $ Month : chr [1:242] "10" "11" "12" "TOTAL" ...
## $ DOMESTIC : num [1:242] 4.81e+07 4.49e+07 4.97e+07 5.52e+08 4.30e+07 ...
## $ INTERNATIONAL: num [1:242] 9.58e+06 9.02e+06 1.00e+07 1.19e+08 9.73e+06 ...
## $ TOTAL : num [1:242] 5.76e+07 5.39e+07 5.97e+07 6.71e+08 5.28e+07 ...
anyNA(Passenger_data) # True
## [1] TRUE
dim(Passenger_data) # 242 Rows 5 Columns.
## [1] 242 5
#Remove the rows containing total values an nul values in the end because these point's will not help me in further analysis.
Passenger_data2 <- Passenger_data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,
199,212,225,238:242),]
dim(Passenger_data2)
## [1] 219 5
#The new dataset has 219 rows means the 219 months data of the total passengers traveling and 5 columns.
#Now my dataset has different columns for month and year, but for making this data frame as a time series variable I need a date column which will be the combination of both these columns so, I simply paste the month and year column into a single attribute and then transform it into the date column.
Passenger_data2$Date <- as.Date(paste(month.abb[as.numeric(Passenger_data2$Month)], "01",
Passenger_data2$Year, sep="-"),format = "%b-%d-%Y")
#Next I transform my whole data into the time series to see if the trends of the international and domestic passengers are the same throughout the years or both showing different behavior.
#Transform the whole data into time series.
Passenger_Data_xts <- xts( x=Passenger_data2[,c(3:5)], order.by = Passenger_data2$Date)
ts_plot(Passenger_Data_xts)
cor(Passenger_data2$DOMESTIC,Passenger_data2$INTERNATIONAL)
## [1] 0.9041159
#By looking at the plot and correlation value I can say that both the domestic and international passenger count was highly correlated so I don't need to consider both these variables for my time series analysis if I want to do the further analysis with total passenger count. Here one more thing I want to add that as the covid-19 impact was not in a regular cycle so a model will not able to predict the actual 2020 passenger count, because of which the error will be high because the model was not able to identify the behavior of the data within this range and ultimately considered it as the outlier. so what I will do here I will predict for 2020 and will see how much impact in total it make due to covid-19.
ARIMA
#As I am dealing with the months series so I need to create a dummy variable for 12 months. I will take 228 rows because my data points start from October so I will drop the first 9 rows from the month column.
Month <- data.frame(outer(rep(month.abb,length=228),month.abb,"==")+0)
Month <- Month[10:228,]
head(Month)
#Assign Column names as month instead of variable name for better understanding.
colnames(Month) <- month.abb
head(Month)
#Combine the passenger data with the month data.
Passenger_data3 <- data.frame(Passenger_data2[,c(6,5)],Month)
head(Passenger_data3)
#now i will build few columns into the passenger data to analyze the additive and multiplicative seasonality of my dataset.
Passenger_data3["t"] <-1:219 #For linear impact.
Passenger_data3["t_squared"] <- Passenger_data3["t"]*Passenger_data3["t"] #For quadratic impact
Passenger_data3["log_Passengers"] <- log(Passenger_data3["TOTAL"]) # for multiplicative impact
train_set <- Passenger_data3[1:183,]
test_set <- Passenger_data3[184:207,]
# Model Based Forecasting Analysis
attach(train_set)
# Multiplicative Seasonality
Multi_Season <- lm(log_Passengers ~ Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Multi_Season)
##
## Call:
## lm(formula = log_Passengers ~ Jan + Feb + Mar + Apr + May + Jun +
## Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20684 -0.04749 -0.00781 0.03422 0.19812
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.01764 0.02119 850.356 < 2e-16 ***
## Jan -0.09328 0.03046 -3.062 0.00255 **
## Feb -0.14866 0.03046 -4.880 2.41e-06 ***
## Mar 0.06074 0.03046 1.994 0.04774 *
## Apr 0.01825 0.03046 0.599 0.54980
## May 0.05625 0.03046 1.847 0.06653 .
## Jun 0.10432 0.03046 3.425 0.00077 ***
## Jul 0.15536 0.03046 5.100 8.94e-07 ***
## Aug 0.12243 0.03046 4.019 8.73e-05 ***
## Sep -0.04118 0.03046 -1.352 0.17823
## Oct 0.01825 0.02996 0.609 0.54338
## Nov -0.03857 0.02996 -1.287 0.19974
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08475 on 171 degrees of freedom
## Multiple R-squared: 0.5178, Adjusted R-squared: 0.4868
## F-statistic: 16.69 on 11 and 171 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Multi_Season_pred <- data.frame(predict(Multi_Season,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Multi_Season, newdata = test_set, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
#As my prediction contains logrithmic function within it i need to transform it
Multi_Season_pred1 <- exp(Multi_Season_pred$fit)
#Calculate Mean Absolute Percentage Error
mape(Multi_Season_pred1,test_set$TOTAL) #0.26
## [1] 0.2605652
############# Multiplicative Seasonality with linear data ###########
Multi_Season_linear <- lm(log_Passengers~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Multi_Season_linear)
##
## Call:
## lm(formula = log_Passengers ~ t + Jan + Feb + Mar + Apr + May +
## Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.099405 -0.039952 -0.008208 0.041053 0.101059
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.790e+01 1.358e-02 1318.204 < 2e-16 ***
## t 1.279e-03 6.746e-05 18.959 < 2e-16 ***
## Jan -8.688e-02 1.731e-02 -5.018 1.31e-06 ***
## Feb -1.435e-01 1.731e-02 -8.291 3.27e-14 ***
## Mar 6.457e-02 1.731e-02 3.730 0.000261 ***
## Apr 2.081e-02 1.731e-02 1.202 0.230971
## May 5.753e-02 1.731e-02 3.323 0.001090 **
## Jun 1.043e-01 1.731e-02 6.026 1.01e-08 ***
## Jul 1.541e-01 1.731e-02 8.900 8.10e-16 ***
## Aug 1.199e-01 1.731e-02 6.924 8.60e-11 ***
## Sep -4.501e-02 1.731e-02 -2.600 0.010141 *
## Oct 2.080e-02 1.703e-02 1.222 0.223536
## Nov -3.729e-02 1.703e-02 -2.190 0.029890 *
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04817 on 170 degrees of freedom
## Multiple R-squared: 0.8452, Adjusted R-squared: 0.8342
## F-statistic: 77.33 on 12 and 170 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Multi_Season_linear_pred <- data.frame(predict(Multi_Season_linear,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Multi_Season_linear, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
#As my prediction contains logrithmic function within it we need to transform it
Multi_Season_pred2 <- exp(Multi_Season_linear_pred$fit)
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Multi_Season_pred2) #MAPE=0.09
## [1] 0.09396467
############## Additive Seasonality ################
Add_season <- lm(TOTAL ~ Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season)
##
## Call:
## lm(formula = TOTAL ~ Jan + Feb + Mar + Apr + May + Jun + Jul +
## Aug + Sep + Oct + Nov + Dec, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13013400 -3286729 -771218 2248384 13884905
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67061697 1462126 45.866 < 2e-16 ***
## Jan -5995821 2101938 -2.853 0.004873 **
## Feb -9307709 2101938 -4.428 1.69e-05 ***
## Mar 4153052 2101938 1.976 0.049784 *
## Apr 1216339 2101938 0.579 0.563570
## May 3905928 2101938 1.858 0.064852 .
## Jun 7344520 2101938 3.494 0.000606 ***
## Jul 11204047 2101938 5.330 3.06e-07 ***
## Aug 8677200 2101938 4.128 5.70e-05 ***
## Sep -2666768 2101938 -1.269 0.206266
## Oct 1283039 2067758 0.620 0.535756
## Nov -2474263 2067758 -1.197 0.233122
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5849000 on 171 degrees of freedom
## Multiple R-squared: 0.5078, Adjusted R-squared: 0.4761
## F-statistic: 16.04 on 11 and 171 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Add_season_pred <- data.frame(predict(Add_season,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season, newdata = test_set, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_pred$fit) #MAPE=0.20
## [1] 0.2035171
############## Additive Seasonality with linear model ################
Add_season_linear <- lm(TOTAL~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season_linear)
##
## Call:
## lm(formula = TOTAL ~ t + Jan + Feb + Mar + Apr + May + Jun +
## Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5595370 -2885119 -613655 2986714 6161212
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58848878 935856 62.882 < 2e-16 ***
## t 88310 4650 18.992 < 2e-16 ***
## Jan -5554272 1193367 -4.654 6.52e-06 ***
## Feb -8954470 1193285 -7.504 3.30e-12 ***
## Mar 4417981 1193222 3.703 0.000288 ***
## Apr 1392959 1193177 1.167 0.244669
## May 3994238 1193150 3.348 0.001004 **
## Jun 7344520 1193140 6.156 5.21e-09 ***
## Jul 11115737 1193150 9.316 < 2e-16 ***
## Aug 8500580 1193177 7.124 2.84e-11 ***
## Sep -2931697 1193222 -2.457 0.015016 *
## Oct 1459659 1173775 1.244 0.215374
## Nov -2385953 1173748 -2.033 0.043632 *
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3320000 on 170 degrees of freedom
## Multiple R-squared: 0.8423, Adjusted R-squared: 0.8312
## F-statistic: 75.69 on 12 and 170 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Add_season_linear_pred <- data.frame(predict(Add_season_linear,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season_linear, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_linear_pred$fit) #MAPE=0.09
## [1] 0.09655188
# Additive Seasonality with quadratic model
Add_season_quad <- lm(TOTAL ~ t_squared + Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season_quad)
##
## Call:
## lm(formula = TOTAL ~ t_squared + Jan + Feb + Mar + Apr + May +
## Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7682474 -2025930 -657654 2319984 7112758
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.143e+07 8.096e+05 75.880 < 2e-16 ***
## t_squared 4.810e+02 2.257e+01 21.315 < 2e-16 ***
## Jan -5.382e+06 1.100e+06 -4.890 2.32e-06 ***
## Feb -8.779e+06 1.100e+06 -7.978 2.10e-13 ***
## Mar 4.596e+06 1.100e+06 4.177 4.71e-05 ***
## Apr 1.572e+06 1.100e+06 1.429 0.154801
## May 4.174e+06 1.100e+06 3.794 0.000206 ***
## Jun 7.523e+06 1.100e+06 6.839 1.38e-10 ***
## Jul 1.129e+07 1.100e+06 10.266 < 2e-16 ***
## Aug 8.675e+06 1.100e+06 7.886 3.60e-13 ***
## Sep -2.761e+06 1.100e+06 -2.509 0.013027 *
## Oct 1.460e+06 1.082e+06 1.349 0.179081
## Nov -2.385e+06 1.082e+06 -2.204 0.028858 *
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3061000 on 170 degrees of freedom
## Multiple R-squared: 0.866, Adjusted R-squared: 0.8565
## F-statistic: 91.54 on 12 and 170 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Add_season_quad_pred <- data.frame(predict(Add_season_quad,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season_quad, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
Add_season_quad_pred
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_quad_pred$fit) #MAPE=0.05
## [1] 0.05164158
#As I got mape only 0.05% in the case of additive seasonality with the quadratic model so I will forecast my 2020 passenger values based on this model for that I will build my final model by using his method
Final_model <- lm(TOTAL ~ t_squared + Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,
data = Passenger_data3[1:207,])
summary(Final_model)
##
## Call:
## lm(formula = TOTAL ~ t_squared + Jan + Feb + Mar + Apr + May +
## Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = Passenger_data3[1:207,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7321102 -2242522 -561539 2726423 7775883
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.087e+07 7.837e+05 77.669 < 2e-16 ***
## t_squared 5.596e+02 1.705e+01 32.823 < 2e-16 ***
## Jan -5.668e+06 1.064e+06 -5.327 2.76e-07 ***
## Feb -9.214e+06 1.064e+06 -8.660 1.82e-15 ***
## Mar 4.616e+06 1.064e+06 4.339 2.30e-05 ***
## Apr 1.542e+06 1.064e+06 1.449 0.1489
## May 4.370e+06 1.064e+06 4.108 5.89e-05 ***
## Jun 7.730e+06 1.064e+06 7.266 8.79e-12 ***
## Jul 1.147e+07 1.064e+06 10.778 < 2e-16 ***
## Aug 8.748e+06 1.064e+06 8.224 2.79e-14 ***
## Sep -3.021e+06 1.064e+06 -2.839 0.0050 **
## Oct 1.488e+06 1.048e+06 1.419 0.1574
## Nov -2.535e+06 1.048e+06 -2.418 0.0165 *
## Dec NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3145000 on 194 degrees of freedom
## Multiple R-squared: 0.9036, Adjusted R-squared: 0.8977
## F-statistic: 151.6 on 12 and 194 DF, p-value: < 2.2e-16
#Comput the model on test data set and predict the value
Final_model_pred <- data.frame(predict(Final_model,newdata = Passenger_data3[207:219,],interval = "predict"))
## Warning in predict.lm(Final_model, newdata = Passenger_data3[207:219, ], :
## prediction from a rank-deficient fit may be misleading
#As i got my final forecasted values now i need to calculate residual errors too
resid <- Final_model$residuals
acf(resid, lag.max = 12)
#most of my error values are insignificant so i need to manage it
resid_a <- auto.arima(resid)
resid_a$residuals
## Time Series:
## Start = 1
## End = 207
## Frequency = 1
## [1] -4724.963 232541.193 3422318.346 95939.115 -241278.838
## [6] -3685617.915 -2690455.974 -775722.208 1115246.048 1724439.453
## [11] 1444921.690 110838.361 1257734.304 1560959.987 1623760.699
## [16] -36576.279 3871018.159 -1646275.475 671707.776 -2142933.103
## [21] 559434.023 902990.784 296821.535 -527566.945 1913229.924
## [26] 1904824.446 -166632.436 872218.072 1623997.462 949565.549
## [31] -1640691.376 -570355.896 21226.058 175837.497 -1249702.434
## [36] 289775.575 -1305917.458 1817079.277 -305569.828 1123857.325
## [41] 1056922.142 -193759.261 322869.640 -1375506.575 -824978.937
## [46] -1131571.445 -1043490.600 923047.899 572989.923 2497058.831
## [51] -718634.843 1102311.213 459396.456 177369.939 812311.281
## [56] -520862.530 -347448.297 -561465.406 1290051.318 -822935.947
## [61] -191390.003 1552675.248 -2655950.378 885472.773 3091179.783
## [66] -1281807.242 -2741202.385 -642534.141 -1071267.078 -1389780.472
## [71] -329126.541 -3192065.046 -463554.666 -1721573.983 359917.667
## [76] 185188.842 565892.838 -1698211.638 1010420.994 -2316292.342
## [81] -344018.928 1805680.842 262478.844 28113.132 -255568.283
## [86] -706644.856 -107079.185 1059475.980 -885816.447 109693.595
## [91] 405799.889 -520869.764 160251.883 391228.997 237216.351
## [96] 1936923.466 969677.296 -598917.304 -2019002.193 -201209.640
## [101] -749820.532 195509.743 453844.606 687336.238 -553927.425
## [106] 163812.866 -991761.149 2210646.940 -405078.373 -181849.703
## [111] -1248783.928 166329.998 1550059.202 -958954.318 -559357.838
## [116] -617297.340 -297549.603 -726973.339 897679.733 782025.612
## [121] -1241803.763 741076.933 -1494635.582 1034860.837 331773.390
## [126] -123810.838 -1296581.495 874953.838 -460957.319 -1615065.955
## [131] 762531.210 1490183.865 -307338.110 -1661690.383 2459285.614
## [136] -123572.762 -1770429.981 828931.422 825647.726 667450.427
## [141] -895114.730 -434000.115 545059.804 814389.336 135972.234
## [146] -1934297.405 1487397.822 321995.860 -1525227.755 843942.550
## [151] 1293536.434 982401.173 -597617.353 1065754.375 462402.083
## [156] 993625.898 717367.997 -1091208.093 -491236.141 -292949.083
## [161] -181454.954 -601586.826 -832153.536 1857842.263 875436.726
## [166] -470235.081 -1102514.922 2828363.147 -538221.319 -571034.933
## [171] -436204.837 -307148.613 -2516592.812 1029205.279 1601888.019
## [176] 873814.170 782667.686 152611.476 -97297.193 -2557734.075
## [181] 1875453.064 1155031.249 -1229760.064 -1190950.002 -891111.337
## [186] 2262686.426 1219645.536 1392190.016 1316016.077 376268.659
## [191] -516807.043 -3400766.729 1327013.313 1156557.129 -1696617.774
## [196] -1744087.225 -1893428.828 3895093.389 1142770.705 2146385.775
## [201] 562601.168 -429533.136 -868462.686 -1906864.489 664400.099
## [206] -2192843.259 3315856.472
acf(resid_a$residuals, lag.max = 12)
#i got the required values now we can forecast my next 12 months errors.
errors <- forecast(resid_a, h = 12,level = c(95,99))
future_errors <- data.frame(errors)
future_errors <- future_errors$Point.Forecast
# predicted values for new data + future error values
predicted_new_values <- Final_model_pred + future_errors
#Transform the data set into time series
new_data <- ts(predicted_new_values$fit,start = c(2020,1),frequency = 12)
Passenger_ts <- ts(Passenger_data2$TOTAL,frequency = 12,start = c(2002,10))
#visualization of foretasted data with the original data
ts.plot(Passenger_ts,new_data,lty=c(1,3))
#here I can see the difference between the predicted and actual is too high, and that's what I discuss earlier, that the model run based on the past trend and seasonality but if I compute the actual 2020 data it's not able to detect the pattern and will forecast in the negative trend.
Apart from passenger count, there are other factors too that impact due to covid-19 like share price of airlines, load capacity of flights, operating revenue, In my further analysis I use some plot to explain the covid-19 impact on these factors
#Seat Per miles data
Seat_miles <- read_excel("ASM_3_19_2021.xlsx", skip = 1)
head(Seat_miles)
Seat_miles <- Seat_miles[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]
Seat_miles$Date <- as.Date(paste(month.abb[as.numeric(Seat_miles$Month)], "01",
Seat_miles$Year, sep="-"),format = "%b-%d-%Y")
#Transform the total data into time series.
Seat_miles_ts <- ts(Seat_miles$TOTAL,frequency = 12,start = c(2002,10))
Seat_miles_ts
## Jan Feb Mar Apr May Jun Jul
## 2002
## 2003 99317997 87516250 99856670 92157440 93219540 97606466 104101068
## 2004 101731051 95910200 105154882 103440891 106797409 108748141 114350123
## 2005 106280547 97551813 110949520 107923815 112451405 113379629 119035642
## 2006 107123822 96524724 111307830 109033553 112604130 113970339 119423638
## 2007 110771884 99268003 114023483 111756132 115983557 117414438 123429196
## 2008 113938958 106676924 117077397 114418521 118412173 120159608 124570530
## 2009 105344938 95026030 108554895 106758880 109132438 112007956 118349963
## 2010 104357787 91689830 107827035 104265530 110840289 113921176 120855956
## 2011 107104256 96466295 113722053 111553990 114750873 118515657 125559166
## 2012 108291408 101554314 115120344 112323388 116747958 120214447 126527895
## 2013 110149774 99882887 117068914 115512012 121080671 124263270 130603381
## 2014 113094340 101472246 122208312 120303316 126050429 130597006 137900635
## 2015 120526397 107234117 128040048 128176918 134949901 138434941 147507867
## 2016 128071084 120304405 136780778 135107343 142856481 147275917 155012382
## 2017 136209324 121796024 142922750 143081846 149591202 154927014 163827137
## 2018 140739315 127539376 149200935 150287515 158579088 162978997 170698950
## 2019 147172573 131971576 155658262 155535987 162312621 165327547 172564923
## 2020 150776705 134693243 112498114 25835632 23679036 32485474 54192617
## Aug Sep Oct Nov Dec
## 2002 101015447 95524045 100254985
## 2003 104784188 97482939 100674203 96199891 101450217
## 2004 114666296 103588044 107552592 102505001 108188265
## 2005 117906661 107360330 108605023 103390483 108801114
## 2006 119090347 111692210 111397911 105630403 111289117
## 2007 123685645 113331427 115494069 110566860 115643225
## 2008 122872823 107867988 109057437 102123723 106607657
## 2009 116763700 102773665 103963092 98466852 104355417
## 2010 118622738 107487430 110039961 103472594 107639793
## 2011 121611950 111190401 112860499 104460187 111066357
## 2012 124442390 111862198 111322080 106592379 111807196
## 2013 129164109 116005052 118590046 111007511 116734165
## 2014 135825279 121639742 124954189 116269356 123839664
## 2015 145096612 130303931 132920025 123798157 132156240
## 2016 152978614 138094644 139350272 129712133 139245758
## 2017 161093752 141641683 146632681 134733444 144194938
## 2018 167836616 151886924 154624685 142881112 151590645
## 2019 171387256 154344286 157136615 145141865 155724954
## 2020 60185199 54131843 60940774 64989269 69731148
dygraph(Seat_miles_ts) %>%
dyOptions( drawPoints = TRUE, pointSize = 4 )
#Numbers of flight operating data
Flights_Data <- read_excel("Flights_3_19_2021.xlsx", skip = 1)
head(Flights_Data)
Flights_Data <- Flights_Data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]
Flights_Data$Date <- as.Date(paste(month.abb[as.numeric(Flights_Data$Month)], "01",
Flights_Data$Year, sep="-"),format = "%b-%d-%Y")
#Transform the total data into time series.
Flights_Data_xts <- xts( x=Flights_Data[,-c(1,2,6,5)], order.by=Flights_Data$Date)
dygraph(Flights_Data_xts)
RPM_Data <- read_excel("RPM_3_19_2021.xlsx", skip = 1)
head(RPM_Data)
RPM_Data <- RPM_Data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]
RPM_Data$Date <- as.Date(paste(month.abb[as.numeric(RPM_Data$Month)], "01",
RPM_Data$Year, sep="-"),format = "%b-%d-%Y")
#Transform the total data into time series.
rpm_Data_xts <- xts( x=RPM_Data[,-c(1,2,6,5)], order.by=RPM_Data$Date)
ts_plot(rpm_Data_xts)
#revenue from different zone.
Operating_Rev_Data <- read_excel("Operating_Rev_3_19_2021.xlsx",skip = 1)
head(Operating_Rev_Data)
str(Operating_Rev_Data)
## tibble [106 × 8] (S3: tbl_df/tbl/data.frame)
## $ Year : chr [1:106] "2000" "2000" "2000" "2000" ...
## $ Quarter : chr [1:106] "1" "2" "3" "4" ...
## $ DOMESTIC : num [1:106] 23262183 25573067 25313087 24751473 98899811 ...
## $ LATIN AMERICA: num [1:106] 1639679 1626743 1776933 1761932 6805287 ...
## $ ATLANTIC : num [1:106] 2690229 3419032 3796968 3113734 13019963 ...
## $ PACIFIC : num [1:106] 2240370 2432840 2801208 2545414 10019832 ...
## $ INTERNATIONAL: num [1:106] 370524 390007 370491 372305 1503327 ...
## $ TOTAL : num [1:106] 3.02e+07 3.34e+07 3.41e+07 3.25e+07 1.30e+08 ...
Operating_Rev_Data <- Operating_Rev_Data[-c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,
80,85,90,95,100,104:106),]
Operating_Rev_Data$Quarter <- as.numeric(Operating_Rev_Data$Quarter)
Operating_Rev_Data$Year <- as.numeric(Operating_Rev_Data$Year)
attach(Operating_Rev_Data)
## The following object is masked from train_set:
##
## TOTAL
Operating_Rev_Data_ts <- with(Operating_Rev_Data, xts(cbind(DOMESTIC,`LATIN AMERICA`,
ATLANTIC,PACIFIC,INTERNATIONAL),
as.yearqtr(Year + (Quarter-1)/4)))
dygraph(Operating_Rev_Data_ts) %>%
dyOptions( fillGraph=TRUE )
dygraph(Operating_Rev_Data_ts) %>%
dyOptions( stepPlot=TRUE, fillGraph=TRUE)
dygraph(Operating_Rev_Data_ts) %>%
dyOptions(stemPlot=TRUE)
ts_plot(Operating_Rev_Data_ts)
Operating_PL_Data <- read_excel("Operating_PL_3_19_2021.xlsx", skip = 1)
head(Operating_PL_Data)
str(Operating_PL_Data)
## tibble [106 × 8] (S3: tbl_df/tbl/data.frame)
## $ Year : chr [1:106] "2000" "2000" "2000" "2000" ...
## $ Quarter : chr [1:106] "1" "2" "3" "4" ...
## $ DOMESTIC : num [1:106] 139162 1570500 548903 -267068 1991496 ...
## $ LATIN AMERICA: num [1:106] 58927 -86040 33056 6639 12581 ...
## $ ATLANTIC : num [1:106] -68918 250371 226224 -95916 311761 ...
## $ PACIFIC : num [1:106] -106308 154724 83675 5533 137623 ...
## $ INTERNATIONAL: num [1:106] 839 9541 45470 23388 79238 ...
## $ TOTAL : num [1:106] 23702 1899096 937327 -327425 2532700 ...
Operating_PL_Data <- Operating_PL_Data[-c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,
80,85,90,95,100,104:106),]
Operating_PL_Data$Quarter <- as.numeric(Operating_PL_Data$Quarter)
Operating_PL_Data$Year <- as.numeric(Operating_PL_Data$Year)
attach(Operating_PL_Data)
## The following objects are masked from Operating_Rev_Data:
##
## ATLANTIC, DOMESTIC, INTERNATIONAL, LATIN AMERICA, PACIFIC, Quarter,
## TOTAL, Year
## The following object is masked from train_set:
##
## TOTAL
Operating_PL_Data_ts <- with(Operating_PL_Data, xts(cbind(DOMESTIC,`LATIN AMERICA`,
ATLANTIC,PACIFIC,INTERNATIONAL),
as.yearqtr(Year + (Quarter-1)/4)))
ts_plot(Operating_PL_Data_ts)
#Share Price of top Airlines company.
United_Airlines <- read.csv("UAL.csv",header = TRUE)
American_Airlines <- read.csv("AAL.csv",header = TRUE)
Delta_Airlines <- read.csv("DAL.csv",header = TRUE)
southwest_Airlines <- read.csv("LUV.csv",header = TRUE)
Lufthansa_Airlines <- read.csv("LHA.DE.csv",header = TRUE)
Airlines_Shares <- as.data.frame(cbind(United_Airlines[-133,1:2],southwest_Airlines[-133,2],
Delta_Airlines[-133,2],American_Airlines[-133,2],
Lufthansa_Airlines[,2]))
head(Airlines_Shares)
colnames(Airlines_Shares) <- c("Date","United_Airlines","Southwest_Airlines","Delta_Airlines",
"American_Airlines","Lufthansa_Airlines")
Airlines_Shares$Date <- as.Date(Airlines_Shares$Date)
Shares_xts <- xts( x=Airlines_Shares[,-1], order.by=Airlines_Shares$Date)
ts_plot(Shares_xts)